import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data=pd.read_csv("C:/NoterDTU/6_semester/Social_data/website_2/merged_data.csv")import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data=pd.read_csv("C:/NoterDTU/6_semester/Social_data/website_2/merged_data.csv")
crimes = data[['Category', 'Year']]
crimes = crimes[(crimes['Category']=='VEHICLE THEFT') & (crimes['Year']!=2025) ]
crime_counts = crimes["Year"].value_counts().sort_index()
crime_counts.plot(kind="bar",color="indigo",edgecolor="black")
plt.show()
focuscrimes = set(['WEAPON LAWS', 'PROSTITUTION', 'ROBBERY', 'BURGLARY', 'ASSAULT', 'DRUG/NARCOTIC', 'LARCENY/THEFT', 'VANDALISM', 'VEHICLE THEFT', 'STOLEN PROPERTY'])data["Year"].value_counts().sort_index().plot(kind="bar",color="indigo",edgecolor="black")
plt.ylabel("Number of crimes")
plt.xlabel("Year")
plt.title("Number of crimes per year (2003-2025)")Text(0.5, 1.0, 'Number of crimes per year (2003-2025)')

crimes = data[['Category', 'Year']]
crimes = crimes[(crimes['Category']=='VEHICLE THEFT') & (crimes['Year']!=2025) ]
crime_counts = crimes["Year"].value_counts().sort_index()
crime_counts.plot(kind="bar",color="indigo",edgecolor="black")
plt.show()
import pandas as pd
import folium
from folium.plugins import HeatMapWithTime
from IPython.display import display
# Load data
df = pd.read_csv("C:/NoterDTU/6_semester/Social_data/website_2/merged_data.csv")
# Filter for vehicle thefts between 2003-2007
df_filtered = df[(df['Category'] == 'VEHICLE THEFT') &
(df['Year'].between(2003, 2024))].copy()
# Check if filtered data is empty
if df_filtered.empty:
print("Warning: No data after filtering!")
else:
print(f"Found {len(df_filtered)} records after filtering")
# Extract relevant columns and drop NA
df_filtered = df_filtered[['Latitude', 'Longitude', 'Month', 'Year']].dropna()
# Check for valid coordinates
valid_coords = df_filtered[
(df_filtered['Latitude'].between(-90, 90)) &
(df_filtered['Longitude'].between(-180, 180))
]
if len(valid_coords) < len(df_filtered):
print(f"Warning: {len(df_filtered) - len(valid_coords)} records have invalid coordinates")
# Define month mapping and order
month_mapping = {
"January": 1, "February": 2, "March": 3, "April": 4,
"May": 5, "June": 6, "July": 7, "August": 8,
"September": 9, "October": 10, "November": 11, "December": 12
}
month_names = list(month_mapping.keys())
# Create numerical month column
df_filtered['MonthNum'] = df_filtered['Month'].map(month_mapping)
# Sort by year and month
df_filtered = df_filtered.sort_values(['Year', 'MonthNum'])
# Prepare heat data and time index
heat_data = []
time_index = []
for year in range(2003, 2025):
for month_num in range(1, 13):
month_data = df_filtered[
(df_filtered['Year'] == year) &
(df_filtered['MonthNum'] == month_num)
]
coords = month_data[['Latitude', 'Longitude']].values.tolist()
heat_data.append(coords)
time_index.append(f"{month_names[month_num-1]} {year}")
# Print count for debugging
print(f"{month_names[month_num-1]} {year}: {len(coords)} points")
# Only create map if we have data
if any(len(data) > 0 for data in heat_data):
# Create base map
base_map = folium.Map(location=[37.77919, -122.41914], zoom_start=12.5)
# Add heatmap with time
HeatMapWithTime(
heat_data,
index=time_index, # Time labels showing month and year
auto_play=True,
max_opacity=0.5,
radius=13,
min_opacity=0.1,
gradient={0.2: 'blue', 0.4: 'lime', 0.6: 'orange', 0.8: 'red'},
display_index=True,
use_local_extrema=False,
name="Vehicle Thefts",
blur=1
).add_to(base_map)
# Add layer control
folium.LayerControl().add_to(base_map)
# Display map
display(base_map)Found 175849 records after filtering
January 2003: 1130 points
February 2003: 1085 points
March 2003: 1406 points
April 2003: 1440 points
May 2003: 1296 points
June 2003: 1219 points
July 2003: 1257 points
August 2003: 1401 points
September 2003: 1373 points
October 2003: 1302 points
November 2003: 1175 points
December 2003: 1192 points
January 2004: 1364 points
February 2004: 1315 points
March 2004: 1482 points
April 2004: 1507 points
May 2004: 1602 points
June 2004: 1439 points
July 2004: 1468 points
August 2004: 1532 points
September 2004: 1404 points
October 2004: 1532 points
November 2004: 1550 points
December 2004: 1621 points
January 2005: 1681 points
February 2005: 1362 points
March 2005: 1473 points
April 2005: 1586 points
May 2005: 1580 points
June 2005: 1385 points
July 2005: 1414 points
August 2005: 1404 points
September 2005: 1420 points
October 2005: 1766 points
November 2005: 1714 points
December 2005: 1318 points
January 2006: 604 points
February 2006: 567 points
March 2006: 562 points
April 2006: 571 points
May 2006: 540 points
June 2006: 650 points
July 2006: 671 points
August 2006: 685 points
September 2006: 570 points
October 2006: 628 points
November 2006: 603 points
December 2006: 612 points
January 2007: 521 points
February 2007: 465 points
March 2007: 511 points
April 2007: 407 points
May 2007: 374 points
June 2007: 486 points
July 2007: 633 points
August 2007: 707 points
September 2007: 690 points
October 2007: 616 points
November 2007: 541 points
December 2007: 493 points
January 2008: 518 points
February 2008: 475 points
March 2008: 525 points
April 2008: 536 points
May 2008: 510 points
June 2008: 426 points
July 2008: 460 points
August 2008: 509 points
September 2008: 579 points
October 2008: 575 points
November 2008: 443 points
December 2008: 486 points
January 2009: 545 points
February 2009: 400 points
March 2009: 458 points
April 2009: 395 points
May 2009: 401 points
June 2009: 393 points
July 2009: 435 points
August 2009: 488 points
September 2009: 433 points
October 2009: 414 points
November 2009: 434 points
December 2009: 374 points
January 2010: 366 points
February 2010: 363 points
March 2010: 351 points
April 2010: 325 points
May 2010: 271 points
June 2010: 366 points
July 2010: 386 points
August 2010: 380 points
September 2010: 403 points
October 2010: 357 points
November 2010: 396 points
December 2010: 372 points
January 2011: 334 points
February 2011: 311 points
March 2011: 460 points
April 2011: 359 points
May 2011: 374 points
June 2011: 348 points
July 2011: 274 points
August 2011: 370 points
September 2011: 453 points
October 2011: 510 points
November 2011: 451 points
December 2011: 499 points
January 2012: 428 points
February 2012: 473 points
March 2012: 451 points
April 2012: 470 points
May 2012: 455 points
June 2012: 496 points
July 2012: 600 points
August 2012: 573 points
September 2012: 547 points
October 2012: 551 points
November 2012: 578 points
December 2012: 552 points
January 2013: 522 points
February 2013: 470 points
March 2013: 454 points
April 2013: 440 points
May 2013: 503 points
June 2013: 513 points
July 2013: 495 points
August 2013: 583 points
September 2013: 698 points
October 2013: 567 points
November 2013: 453 points
December 2013: 536 points
January 2014: 517 points
February 2014: 442 points
March 2014: 527 points
April 2014: 649 points
May 2014: 641 points
June 2014: 682 points
July 2014: 650 points
August 2014: 645 points
September 2014: 495 points
October 2014: 703 points
November 2014: 566 points
December 2014: 584 points
January 2015: 606 points
February 2015: 550 points
March 2015: 626 points
April 2015: 728 points
May 2015: 825 points
June 2015: 716 points
July 2015: 633 points
August 2015: 628 points
September 2015: 627 points
October 2015: 709 points
November 2015: 638 points
December 2015: 649 points
January 2016: 565 points
February 2016: 543 points
March 2016: 508 points
April 2016: 511 points
May 2016: 493 points
June 2016: 477 points
July 2016: 531 points
August 2016: 562 points
September 2016: 482 points
October 2016: 602 points
November 2016: 586 points
December 2016: 557 points
January 2017: 484 points
February 2017: 507 points
March 2017: 571 points
April 2017: 547 points
May 2017: 462 points
June 2017: 461 points
July 2017: 496 points
August 2017: 456 points
September 2017: 434 points
October 2017: 580 points
November 2017: 318 points
December 2017: 389 points
January 2018: 773 points
February 2018: 677 points
March 2018: 749 points
April 2018: 740 points
May 2018: 524 points
June 2018: 471 points
July 2018: 476 points
August 2018: 430 points
September 2018: 438 points
October 2018: 484 points
November 2018: 443 points
December 2018: 455 points
January 2019: 411 points
February 2019: 397 points
March 2019: 405 points
April 2019: 436 points
May 2019: 437 points
June 2019: 448 points
July 2019: 448 points
August 2019: 419 points
September 2019: 471 points
October 2019: 454 points
November 2019: 481 points
December 2019: 464 points
January 2020: 477 points
February 2020: 473 points
March 2020: 511 points
April 2020: 521 points
May 2020: 602 points
June 2020: 669 points
July 2020: 812 points
August 2020: 651 points
September 2020: 526 points
October 2020: 664 points
November 2020: 744 points
December 2020: 763 points
January 2021: 752 points
February 2021: 623 points
March 2021: 591 points
April 2021: 567 points
May 2021: 666 points
June 2021: 600 points
July 2021: 658 points
August 2021: 661 points
September 2021: 607 points
October 2021: 767 points
November 2021: 645 points
December 2021: 701 points
January 2022: 694 points
February 2022: 723 points
March 2022: 643 points
April 2022: 590 points
May 2022: 582 points
June 2022: 636 points
July 2022: 675 points
August 2022: 692 points
September 2022: 730 points
October 2022: 738 points
November 2022: 700 points
December 2022: 677 points
January 2023: 656 points
February 2023: 646 points
March 2023: 730 points
April 2023: 737 points
May 2023: 831 points
June 2023: 755 points
July 2023: 916 points
August 2023: 770 points
September 2023: 770 points
October 2023: 713 points
November 2023: 667 points
December 2023: 610 points
January 2024: 698 points
February 2024: 675 points
March 2024: 592 points
April 2024: 519 points
May 2024: 565 points
June 2024: 616 points
July 2024: 738 points
August 2024: 622 points
September 2024: 586 points
October 2024: 524 points
November 2024: 427 points
December 2024: 465 points
Make this Notebook Trusted to load map: File -> Trust Notebook
from bokeh.io import output_notebook, show
from bokeh.layouts import column
from bokeh.models import Select, Slope, Label, CustomJS, HoverTool
from bokeh.plotting import figure, ColumnDataSource
import numpy as np
import pandas as pd
# Configure Bokeh to load silently
output_notebook(hide_banner=True)
# Load and prepare your crime data
df = pd.read_csv("C:/NoterDTU/6_semester/Social_data/website_2/merged_data.csv")
# Define focus crimes
focuscrimes = {
'WEAPON LAWS', 'PROSTITUTION', 'ROBBERY', 'BURGLARY', 'ASSAULT',
'DRUG/NARCOTIC', 'LARCENY/THEFT', 'VANDALISM', 'VEHICLE THEFT', 'STOLEN PROPERTY'
}
# Filter and process data
df_focus = df[df['Category'].isin(focuscrimes)]
df_focus_grouped = df_focus.groupby(['Year', 'Month', 'Category']).size().reset_index(name='Crime_Count')
df_focus_grouped['Date'] = pd.to_datetime(df_focus_grouped['Month'] + ' ' + df_focus_grouped['Year'].astype(str), errors='coerce')
df_focus_grouped = df_focus_grouped.dropna()
# Extract month and year for hover tool
df_focus_grouped['Month_Year'] = df_focus_grouped['Date'].dt.strftime('%b %Y')
# Pivot the data
df_pivot = df_focus_grouped.pivot_table(index=['Date', 'Month_Year'], columns='Category', values='Crime_Count', fill_value=0)
df_pivot['Total Crimes'] = df_pivot.sum(axis=1)
df_pivot.reset_index(inplace=True)
# Prepare plotting data
numeric_cols = [col for col in df_pivot.columns if col not in ['Date', 'Month_Year']]
df_plot = df_pivot[numeric_cols]
# Set initial variables
x_init = numeric_cols[8]
y_init = numeric_cols[1]
x_data = df_plot[x_init].values
y_data = df_plot[y_init].values
# Calculate initial regression
n = len(x_data)
x_sum, y_sum, xy_sum, x2_sum, y2_sum = x_data.sum(), y_data.sum(), (x_data*y_data).sum(), (x_data**2).sum(), (y_data**2).sum()
slope_val = (n * xy_sum - x_sum * y_sum) / (n * x2_sum - x_sum * x_sum)
intercept = (y_sum - slope_val * x_sum) / n
r_value = (n * xy_sum - x_sum * y_sum) / np.sqrt((n * x2_sum - x_sum * x_sum) * (n * y2_sum - y_sum * y_sum))
r_squared = r_value ** 2
# Create ColumnDataSource with Month_Year for hover tool
source = ColumnDataSource(df_pivot)
# Create figure with initial axis labels
plot = figure(
title="Crime Data Correlation Analysis",
x_axis_label="Number of incidents for X-axis crime type (month,year)",
y_axis_label="Number of incidents for Y-axis crime type (month,year)",
tools="pan,wheel_zoom,box_zoom,reset",
width=750,
height=550,
background_fill_color="#f5f5f5",
toolbar_location="above"
)
# Format plot appearance
plot.title.text_font_size = '16pt'
plot.xaxis.axis_label_text_font_size = "12pt"
plot.yaxis.axis_label_text_font_size = "12pt"
plot.grid.grid_line_alpha = 0.3
# Add only the month-year hover tool
hover = HoverTool(
tooltips=[
("Time Period", "@Month_Year"),
(x_init, f"@{{{x_init}}}"),
(y_init, f"@{{{y_init}}}"),
("Total Crimes", "@{Total Crimes}")
],
mode='mouse'
)
plot.add_tools(hover)
# Initial scatter plot
scatter = plot.scatter(x=x_init, y=y_init, source=source, size=10,
color="navy", alpha=0.7, line_color="white")
# Dropdown widgets
x_axis = Select(title="X-Axis Crime Type:", value=x_init,
options=sorted(numeric_cols), width=250)
y_axis = Select(title="Y-Axis Crime Type:", value=y_init,
options=sorted(numeric_cols), width=250)
# Regression line
slope = Slope(gradient=slope_val, y_intercept=intercept,
line_color='red', line_dash='dashed', line_width=2.5)
plot.add_layout(slope)
# R² label
r_squared_label = Label(x=70, y=10, x_units='screen', y_units='screen',
text=f"R² = {r_squared:.3f}", text_font_size='13px',
text_color='red', background_fill_color='white',
background_fill_alpha=0.8)
plot.add_layout(r_squared_label)
# JavaScript callback with axis label updates
callback = CustomJS(args=dict(
source=source,
scatter=scatter,
slope=slope,
r_squared_label=r_squared_label,
plot=plot,
x_axis=x_axis,
y_axis=y_axis
), code="""
const x = x_axis.value;
const y = y_axis.value;
const x_data = source.data[x];
const y_data = source.data[y];
// Calculate statistics
let x_sum = 0, y_sum = 0, xy_sum = 0, x2_sum = 0, y2_sum = 0;
const n = x_data.length;
for (let i = 0; i < n; i++) {
x_sum += x_data[i];
y_sum += y_data[i];
xy_sum += x_data[i] * y_data[i];
x2_sum += x_data[i] * x_data[i];
y2_sum += y_data[i] * y_data[i];
}
// Calculate regression parameters
const slope_val = (n * xy_sum - x_sum * y_sum) / (n * x2_sum - x_sum * x_sum);
const intercept = (y_sum - slope_val * x_sum) / n;
const r_value = (n * xy_sum - x_sum * y_sum) /
Math.sqrt((n * x2_sum - x_sum * x_sum) * (n * y2_sum - y_sum * y_sum));
const r_squared = r_value * r_value;
// Update plot elements
scatter.glyph.x = {field: x};
scatter.glyph.y = {field: y};
slope.gradient = slope_val;
slope.y_intercept = intercept;
r_squared_label.text = `R² = ${r_squared.toFixed(3)}`;
// Update axis labels
plot.xaxis.axis_label = `${x} (Count)`;
plot.yaxis.axis_label = `${y} (Count)`;
""")
# Connect callbacks
x_axis.js_on_change('value', callback)
y_axis.js_on_change('value', callback)
# Layout
layout = column(
column(x_axis, y_axis, width=300),
plot
)
# Show the plot
show(layout)
#yes